library("tidyverse")
library("patchwork")
library("viridis")
library("ggrepel")
library("DT")img_details <- read_csv("data_emilia/img_details.csv", show_col_types = FALSE)
asta_samples <- read_csv("data_emilia/asta_samples.csv", show_col_types = FALSE)
red_ind_final <- read_csv("data_emilia/red_ind_final.csv", show_col_types = FALSE) %>% rename(image_name = img_name)
img_details <- img_details %>% left_join(red_ind_final)Dataset for all images (699 images) :
red_index_by_image <- img_details %>%
group_by(sample_name) %>%
mutate(sample_redness = mean(redness_index), sample_red_pixels = mean(red_pixels_nb)) %>%
left_join(select(asta_samples, sample_name,asta_ng_mg,mass_mg), by = "sample_name") %>%
mutate(asta_ng = asta_ng_mg*mass_mg)
datatable(red_index_by_image, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T))Sample level (70 samples) :
red_index_by_sample <- img_details %>%
group_by(sample_name) %>%
summarise(sample_redness = mean(redness_index), sample_red_pixels = mean (red_pixels_nb)) %>%
left_join(asta_samples, by = "sample_name") %>%
mutate(asta_ng = asta_ng_mg*mass_mg)
datatable(red_index_by_sample, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T))Add path to images in the detailed dataset :
# Paths in
red_index_by_image$path_in_original = str_c("~/complex/ecoplast_clean_dataset/1.Original/",red_index_by_image$image_name)
red_index_by_image$path_in_calib = str_c("~/complex/ecoplast_clean_dataset/2.Calibrated/",red_index_by_image$image_name)
red_index_by_image$path_in_index = str_c("~/complex/ecoplast_clean_dataset/4.Red_pixels_highlighted/",red_index_by_image$image_name)
red_index_by_image$path_in_mask = str_c("~/complex/ecoplast_clean_dataset/3.Red_pixels_masked/",red_index_by_image$image_name)
# Paths out
red_index_by_image$path_out_original = str_c("~/complex/ecoplast_clean_dataset/results/sample_to_show/",red_index_by_image$image_name, "original.tif")
red_index_by_image$path_out_calib = str_c("~/complex/ecoplast_clean_dataset/results/sample_to_show/",red_index_by_image$image_name, "calib.tif")
red_index_by_image$path_out_index = str_c("~/complex/ecoplast_clean_dataset/results/sample_to_show/",red_index_by_image$image_name, "index.tif")
red_index_by_image$path_out_mask = str_c("~/complex/ecoplast_clean_dataset/results/sample_to_show/",red_index_by_image$image_name, "mask.tif")We want to select ten images of Figure 3.1, including three images with a known redness gradient for Fig. 3.3. The seven others are chosen randomly.
selection3 <- red_index_by_image %>% filter(image_name %in% c("2018_G_p85_5.tif", "2018_AR_p22_4.tif", "2018_AR_p21_5.tif"))
#sample7 <- sample(x = 1:699, size = 7)
#sample7_to_show <- red_index_by_image[sample7,]
#write_csv(sample7_to_show, "data_emilia/sample7_to_show.csv")
sample7_to_show <- read_csv("data_emilia/sample7_to_show.csv", show_col_types = FALSE)
sample7_to_show$path_in_original = str_c("~/complex/ecoplast_clean_dataset/1.Original/",sample7_to_show$image_name)
sample7_to_show$path_out_original = str_c("~/complex/ecoplast_clean_dataset/results/sample_to_show/",sample7_to_show$image_name, "original.tif")
# setdiff(sample7_to_show$image_name, selection3$image_name)
# -> ok they are all differentRepresentation of selected images on the relationship with raw data.
ggplot(red_index_by_image)+
# all images
geom_point(aes(x = asta_ng, y = redness_index), color = "grey50", alpha = 0.7, size = 0.5)+
# averages and smooth
geom_point(aes(x = asta_ng, y = sample_redness), color = "#01665e", size = 1.5)+
geom_smooth(aes(x = asta_ng, y = sample_redness), color = "#01665e", linewidth = 0.5, se= F)+
# selected 3 images
geom_point(aes(x = asta_ng, y = redness_index), color = "#d94801", alpha = 0.7, size = 1, data = selection3)+
geom_text_repel(aes(x = asta_ng, y = redness_index, label = image_name), color = "#d94801", data = selection3, size =3, max.overlaps = 15)+
# random sample of seven images
geom_point(aes(x = asta_ng, y = redness_index), color = "#d94801", alpha = 0.7, size = 1, data = sample7_to_show)+
geom_text_repel(aes(x = asta_ng, y = redness_index, label = image_name), color = "#d94801", data = sample7_to_show, size =3, max.overlaps = 15)+
labs(x= "Total astaxanthin content (10 individuals) (ng)", y ="Redness indices (averages for 10 images in red)")+
theme_bw(base_size=10)## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Final sample of 10 images associated with the image name on Figure 3.1.
sample7_to_show <- mutate(sample7_to_show, graphid = "sample7")
selection3 <- mutate(selection3, graphid = "selection3")
sample_to_show <- bind_rows(sample7_to_show, selection3)
image_letters <- read_delim("data_emilia/image_letters.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE, show_col_types = FALSE)
sample_to_show <- sample_to_show %>% left_join(image_letters ) %>% select(image_name, image_letter, everything())
datatable(sample_to_show, rownames = FALSE, filter="top", options = list(pageLength = 3, scrollX=T))Log (ln) transformation and initial (intuitive) representation of RI = f(TAC) :
a <- ggplot(red_index_by_image)+
# all images
geom_point(aes(y = asta_ng,x = redness_index), color = "grey60", alpha = 0.7, size = 1)+
# averages and smooth
geom_point(aes(y = asta_ng, x = sample_redness), color = "#01665e", size = 2)+
#geom_smooth(aes(x = asta_ng, y = sample_redness_log), color = "grey50", size = 0.5, se= F)+
geom_smooth(aes(y = asta_ng, x = sample_redness), method = "lm", formula = 'y ~ x',color = "#01665e", linewidth = 0.5, se= F)+
# print selected image
geom_point(aes(y = asta_ng, x = redness_index), color = "#d94801", size = 1.5, data = sample_to_show)+
geom_text_repel(aes(y = asta_ng, x = redness_index, label = image_letter), color = "#d94801", data = sample_to_show, size =4, max.overlaps = 15)+
labs(y= "Total astaxanthin content (TAC) (ng) (log axis)", x ="Redness indices (log axis)")+
theme_bw(base_size=12) +
scale_y_continuous(trans = "log")+
scale_x_continuous(trans = "log")
b <- ggplot(red_index_by_image)+
# all images
geom_point(aes(x = asta_ng, y = redness_index), color = "grey60", alpha = 0.7, size = 1)+
# averages and smooth
geom_point(aes(x = asta_ng, y = sample_redness), color = "#01665e", size = 2)+
#geom_smooth(aes(x = asta_ng, y = sample_redness_log), color = "grey50", size = 0.5, se= F)+
geom_smooth(aes(x = asta_ng, y = sample_redness), method = "lm", formula = 'y ~ x',color = "#01665e", linewidth = 0.5, se= F)+
# print selected image
geom_point(aes(x = asta_ng, y = redness_index), color = "#d94801", size = 1.5, data = sample_to_show)+
geom_text_repel(aes(x = asta_ng, y = redness_index, label = image_letter), color = "#d94801", data = sample_to_show, size =4, max.overlaps = 15)+
labs(x= "Total astaxanthin content (TAC) (ng) (log axis)", y ="Redness indices (log axis)")+
theme_bw(base_size=12) +
scale_y_continuous(trans = "log")+
scale_x_continuous(trans = "log")
a + bLarge improvement with log-log represention.
1) Linear model without log
summary(lm(asta_ng ~ sample_redness, data = red_index_by_sample))##
## Call:
## lm(formula = asta_ng ~ sample_redness, data = red_index_by_sample)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.600 -4.829 -2.390 2.170 32.720
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.838617 1.675392 2.888 0.00519 **
## sample_redness 0.046039 0.007382 6.237 3.26e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.663 on 68 degrees of freedom
## Multiple R-squared: 0.3639, Adjusted R-squared: 0.3545
## F-statistic: 38.9 on 1 and 68 DF, p-value: 3.263e-08
# 35%summary(lm(sample_redness ~ asta_ng, data = red_index_by_sample))##
## Call:
## lm(formula = sample_redness ~ asta_ng, data = red_index_by_sample)
##
## Residuals:
## Min 1Q Median 3Q Max
## -228.13 -68.28 -40.67 99.31 234.92
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 75.258 21.394 3.518 0.000781 ***
## asta_ng 7.904 1.267 6.237 3.26e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 113.5 on 68 degrees of freedom
## Multiple R-squared: 0.3639, Adjusted R-squared: 0.3545
## F-statistic: 38.9 on 1 and 68 DF, p-value: 3.263e-08
# 35%
2) Linear model with log
# Log(ln) transformation
summary(lm(log(asta_ng) ~ log(sample_redness), data = red_index_by_sample))##
## Call:
## lm(formula = log(asta_ng) ~ log(sample_redness), data = red_index_by_sample)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.77473 -0.46826 -0.03245 0.37502 1.70927
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.81878 0.39543 -4.6 1.9e-05 ***
## log(sample_redness) 0.83076 0.08143 10.2 2.4e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7089 on 68 degrees of freedom
## Multiple R-squared: 0.6048, Adjusted R-squared: 0.599
## F-statistic: 104.1 on 1 and 68 DF, p-value: 2.396e-15
# 60%
lm_TAC_RI <- lm(log(asta_ng) ~ log(sample_redness), data = red_index_by_sample)
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(lm_TAC_RI , las = 1) # Residuals, Fitted, ...par(opar)summary(lm(log(sample_redness) ~ log(asta_ng), data = red_index_by_sample))##
## Call:
## lm(formula = log(sample_redness) ~ log(asta_ng), data = red_index_by_sample)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.52924 -0.41686 0.05062 0.49078 1.22072
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.19852 0.17093 18.71 < 2e-16 ***
## log(asta_ng) 0.72806 0.07136 10.20 2.4e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6636 on 68 degrees of freedom
## Multiple R-squared: 0.6048, Adjusted R-squared: 0.599
## F-statistic: 104.1 on 1 and 68 DF, p-value: 2.396e-15
lm_RI_TAC <- lm(log(sample_redness) ~ log(asta_ng), data = red_index_by_sample)
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(lm_RI_TAC , las = 1) # Residuals, Fitted, ...par(opar)3) Linear model with log on all images
# Ln transformation + with all images and no averages
summary(lm(log(asta_ng) ~ log(redness_index), data = red_index_by_image))##
## Call:
## lm(formula = log(asta_ng) ~ log(redness_index), data = red_index_by_image)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0743 -0.4657 -0.0073 0.4610 3.2592
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.62011 0.11738 -5.283 1.7e-07 ***
## log(redness_index) 0.59632 0.02464 24.201 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.82 on 697 degrees of freedom
## Multiple R-squared: 0.4566, Adjusted R-squared: 0.4558
## F-statistic: 585.7 on 1 and 697 DF, p-value: < 2.2e-16
# 46% -> more variability 4) Linear model with number of red pixels instead of RI ln(TAC) = f(ln(red_pix)) -> p-value < 0.001, Adjusted R-squared = 0.5821, slope (b) = 0.94, intercept (ln(a)) = -1.17 Linear model test :
red_pix_sample <-
red_index_by_image %>%
group_by(sample_name) %>%
summarise(asta_ng = mean(asta_ng), red_pixels_nb = mean(red_pixels_nb))
summary(lm(log(asta_ng) ~ log(red_pixels_nb), data = red_pix_sample))##
## Call:
## lm(formula = log(asta_ng) ~ log(red_pixels_nb), data = red_pix_sample)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.77191 -0.44083 0.01075 0.34020 1.54881
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.17789 0.34584 -3.406 0.00111 **
## log(red_pixels_nb) 0.94703 0.09611 9.854 9.92e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7237 on 68 degrees of freedom
## Multiple R-squared: 0.5881, Adjusted R-squared: 0.5821
## F-statistic: 97.1 on 1 and 68 DF, p-value: 9.924e-15
Want to check distribution of original data.
# Image redness indices
a <- ggplot(red_index_by_image)+
geom_density(aes(x = log(redness_index)), color = "black")+
labs(title="Image redness indices")
# Sample redness indices
b <- ggplot(red_index_by_image)+
geom_density(aes(x = log(sample_redness)), color = "black")+
labs(title="Sample redness indices")
# HPLC astaxanthin
c <- ggplot(red_index_by_image)+
geom_density(aes(x = log(asta_ng)), color = "black")+
labs(title="HPLC (ng)")
a + b + cDistribution is clearly not normal, but bi-modal. We can suppose an influence of the year, and/or a “real” bi-modal distribution of red vs not red copepods (between C.finmarchicus and C.glacialis, or between two water masses for example?).
Summary redness index for all images :
summary(red_index_by_image$redness_index)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6108 38.5207 116.9253 178.0560 307.8942 1046.6369
Summary redness for samples :
summary(red_index_by_image$sample_redness)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.53 44.83 131.24 178.06 322.14 442.70
Summary HPLC for samples :
summary(red_index_by_image$asta_ng)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3435 4.0698 10.7422 13.0184 18.2080 47.9790
Add year as the shape of points on the graph.
a <- ggplot(red_index_by_image)+
# all images
geom_point(aes(x = asta_ng, y = redness_index, shape = factor(year)), color = "grey60", alpha = 0.7, size = 1)+
# averages and smooth
geom_point(aes(x = asta_ng, y = sample_redness, shape = factor(year)), color = "#01665e", size = 2)+
#geom_smooth(aes(x = asta_ng, y = sample_redness_log), color = "grey50", size = 0.5, se= F)+
geom_smooth(aes(x = asta_ng, y = sample_redness), method = "lm", color = "#01665e", formula = 'y ~ x',linewidth = 0.5, se= F)+
# print selected image
geom_point(aes(x = asta_ng, y = redness_index, shape = factor(year)), color = "#d94801", size = 1.5, data = sample_to_show)+
geom_text_repel(aes(x = asta_ng, y = redness_index, label = image_letter), color = "#d94801", data = sample_to_show, size = 4, max.overlaps = 15)+
labs(x= "Total astaxanthin content (TAC) (ng) (log axis)", y ="Redness indices (log axis)")+
theme_bw(base_size=12) + theme(legend.position = "none") +
scale_y_continuous(trans = "log")+
scale_x_continuous(trans = "log")
b <- ggplot(red_index_by_image)+
# all images
geom_point(aes(y = asta_ng,x = redness_index, shape = factor(year)), color = "grey60", alpha = 0.7, size = 1)+
# averages and smooth
geom_point(aes(y = asta_ng, x = sample_redness, shape = factor(year)), color = "#01665e", size = 2)+
#geom_smooth(aes(x = asta_ng, y = sample_redness_log), color = "grey50", size = 0.5, se= F)+
geom_smooth(aes(y = asta_ng, x = sample_redness), method = "lm", color = "#01665e", formula = 'y ~ x',linewidth = 0.5, se= F)+
# print selected image
geom_point(aes(y = asta_ng, x = redness_index, shape = factor(year)), color = "#d94801", size = 1.5, data = sample_to_show)+
geom_text_repel(aes(y = asta_ng, x = redness_index, label = image_letter), color = "#d94801", data = sample_to_show, size =4, max.overlaps = 15)+
labs(y= "Total astaxanthin content (TAC) (ng) (log axis)", x ="Redness indices (log axis)", shape = "Year")+
theme_bw(base_size=12) +
scale_y_continuous(trans = "log")+
scale_x_continuous(trans = "log")
a + bggplot(red_index_by_image)+
# all images
geom_point(aes(y = asta_ng,x = redness_index,shape = factor(year)), color = "grey60", alpha = 0.7, size = 1)+
# averages and smooth
geom_point(aes(y = asta_ng, x = sample_redness,shape = factor(year)), color = "#01665e", size = 2)+
#geom_smooth(aes(x = asta_ng, y = sample_redness_log), color = "grey50", size = 0.5, se= F)+
geom_smooth(aes(y = asta_ng, x = sample_redness), method = "lm", formula = 'y ~ x',color = "#01665e", linewidth = 0.5, se= F)+
# print selected image
geom_point(aes(y = asta_ng, x = redness_index,shape = factor(year)), color = "#d94801", size = 1.5, data = sample_to_show)+
geom_text_repel(aes(y = asta_ng, x = redness_index, label = image_letter), color = "#d94801", data = sample_to_show, size =6, max.overlaps = 15)+
labs(y= "TAC (ng) (log-transformed)", x ="Redness indices (mm2) (log-transformed)", shape ="Year")+
theme_bw(base_size=14) +
scale_y_continuous(trans = "log", labels = scales::number_format(accuracy = 0.01))+
scale_x_continuous(trans = "log", labels = scales::number_format(accuracy = 0.01))ggsave("figures/HPLC_redness_log.pdf", width=9, height =5)We can see that points are often, but not systemically, grouped by year.
# Image redness indices
a <- ggplot()+
geom_density(aes(x = log(redness_index)), color = "black", data = red_index_by_image, size = 1.5)+
geom_density(aes(x = log(redness_index)), data = filter(red_index_by_image, year == 2018), color = "violetred")+
geom_density(aes(x = log(redness_index)), data = filter(red_index_by_image, year == 2019), color = "yellowgreen")+
labs(title="Image redness indices")## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
# Sample redness indices
b <- ggplot(red_index_by_image)+
geom_density(aes(x = log(sample_redness)),color = "black", data = red_index_by_image, size = 1.5)+
geom_density(aes(x = log(sample_redness)), data = filter(red_index_by_image, year == 2018), color = "violetred")+
geom_density(aes(x = log(sample_redness)), data = filter(red_index_by_image, year == 2019), color = "yellowgreen")+
labs(title="Sample redness indices")
# HPLC astaxanthin
c <- ggplot(red_index_by_image)+
geom_density(aes(x = log(asta_ng)), color = "black", data = red_index_by_image, size = 1.5)+
geom_density(aes(x = log(asta_ng)), data = filter(red_index_by_image, year == 2018), color = "violetred")+
geom_density(aes(x = log(asta_ng)), data = filter(red_index_by_image, year == 2019), color = "yellowgreen")+
labs(title="HPLC (ng)")
a + b + c Black = both years, Green = 2019, Purple = 2018
Obvious bi-modal distribution influenced by the year (copepods less red in 2018), coherent between HPLC and imaging methods. We can also see a small bi-modal distribution within year 2018. Is it a problem ? I think not if residuals are normal, which is the case (see next paragraph).
a <- ggplot()+
geom_density(aes(x = lm_RI_TAC$residuals), color = "darkblue")
shapiro.test(lm_RI_TAC$residuals)##
## Shapiro-Wilk normality test
##
## data: lm_RI_TAC$residuals
## W = 0.97208, p-value = 0.1195
b <- ggplot()+
geom_density(aes(x = lm_TAC_RI$residuals), color = "darkblue")
shapiro.test(lm_TAC_RI$residuals)##
## Shapiro-Wilk normality test
##
## data: lm_TAC_RI$residuals
## W = 0.98049, p-value = 0.3454
a+bResiduals are normally distributed in both senses of the relationship.
I gathered two types of outliers in the models : - maximum leverage = max Cook distance : green - maximum residuals (based on diagnostic graphs) : blue
leverage <- data.frame(lev_TAC_RI = hatvalues(lm_TAC_RI), lev_RI_TAC = hatvalues(lm_RI_TAC))
red_index_by_sample <- bind_cols(red_index_by_sample, leverage)outliers_lev_TAC_RI <- red_index_by_sample %>% filter(lev_TAC_RI > 0.04)
outliers_res_TAC_RI <- red_index_by_sample %>% filter(rownames(red_index_by_sample) %in% c("16", "20", "41"))
ggplot(red_index_by_image)+
# all images
geom_point(aes(y = asta_ng,x = redness_index, shape = factor(year)), color = "grey60", alpha = 0.7, size = 1)+
# averages and smooth
geom_point(aes(y = asta_ng, x = sample_redness, shape = factor(year)), color = "#01665e", size = 2)+
#geom_smooth(aes(x = asta_ng, y = sample_redness_log), color = "grey50", size = 0.5, se= F)+
geom_smooth(aes(y = asta_ng, x = sample_redness), method = "lm", color = "#01665e", formula = 'y ~ x',size = 0.5, se= F)+
# print selected image
geom_point(aes(y = asta_ng, x = redness_index, shape = factor(year)), color = "#d94801", size = 1.5, data = sample_to_show)+
geom_text_repel(aes(y = asta_ng, x = redness_index, label = image_letter), color = "#d94801", data = sample_to_show, size =6, max.overlaps = 15)+
# outliers
geom_point(aes(y = asta_ng, x = sample_redness), shape = "x", color = "green", size = 4, data = outliers_lev_TAC_RI)+
geom_point(aes(y = asta_ng, x = sample_redness), shape = "x", color = "blue", size = 4, data = outliers_res_TAC_RI)+
#labs etc
labs(y= "Total astaxanthin content (TAC) (ng) (log axis)", x ="Redness indices (log axis)")+
theme_bw(base_size=10) +
scale_y_continuous(trans = "log")+
scale_x_continuous(trans = "log")outliers_lev_RI_TAC <- red_index_by_sample %>% filter(lev_RI_TAC > 0.045)
outliers_res_RI_TAC <- red_index_by_sample %>% filter(rownames(red_index_by_sample) %in% c("16", "20", "21"))
ggplot(red_index_by_image)+
# all images
geom_point(aes(x = asta_ng, y = redness_index, shape = factor(year)), color = "grey60", alpha = 0.7, size = 1)+
# averages and smooth
geom_point(aes(x = asta_ng, y = sample_redness, shape = factor(year)), color = "#01665e", size = 2)+
#geom_smooth(aes(x = asta_ng, y = sample_redness_log), color = "grey50", size = 0.5, se= F)+
geom_smooth(aes(x = asta_ng, y = sample_redness), method = "lm", color = "#01665e", formula = 'y ~ x',size = 0.5, se= F)+
# print selected image
geom_point(aes(x = asta_ng, y = redness_index, shape = factor(year)), color = "#d94801", size = 1.5, data = sample_to_show)+
geom_text_repel(aes(x = asta_ng, y = redness_index, label = image_letter), color = "#d94801", data = sample_to_show, size =6, max.overlaps = 15)+
# outliers
geom_point(aes(x = asta_ng, y = sample_redness), shape = "x", color = "green", size = 4, data = outliers_lev_RI_TAC)+
geom_point(aes(x = asta_ng, y = sample_redness), shape = "x", color = "blue", size = 4, data = outliers_res_RI_TAC)+
#labs etc
labs(x= "Total astaxanthin content (TAC) (ng) (log axis)", y ="Redness indices (log axis)")+
theme_bw(base_size=10) +
scale_y_continuous(trans = "log")+
scale_x_continuous(trans = "log")
red_index_by_image2 <- red_index_by_image %>% filter(!(sample_name %in% c("AT_p116", "AT_p117", "AT_p118", "AT_p119", "AT_p120", "AT_p105")))
red_index_by_sample2 <- red_index_by_sample %>% filter(!(sample_name %in% c("AT_p116", "AT_p117", "AT_p118", "AT_p119", "AT_p120", "AT_p105")))a <- ggplot(red_index_by_image2)+
# all images
geom_point(aes(y = asta_ng,x = redness_index, shape = factor(year)), color = "grey60", alpha = 0.7, size = 1)+
# averages and smooth
geom_point(aes(y = asta_ng, x = sample_redness, shape = factor(year)), color = "#01665e", size = 2)+
#geom_smooth(aes(x = asta_ng, y = sample_redness_log), color = "grey50", size = 0.5, se= F)+
geom_smooth(aes(y = asta_ng, x = sample_redness), method = "lm", color = "#01665e", formula = 'y ~ x',size = 0.5, se= F)+
# print selected image
geom_point(aes(y = asta_ng, x = redness_index, shape = factor(year)), color = "#d94801", size = 1.5, data = sample_to_show)+
geom_text_repel(aes(y = asta_ng, x = redness_index, label = image_letter), color = "#d94801", data = sample_to_show, size =4, max.overlaps = 15)+
#labs etc
labs(y= "Total astaxanthin content (TAC) (ng) (log axis)", x ="Redness indices (log axis)", shape = "Year")+
theme_bw(base_size=12) +
scale_y_continuous(trans = "log")+
scale_x_continuous(trans = "log")
b<- ggplot(red_index_by_image2)+
# all images
geom_point(aes(x = asta_ng, y = redness_index, shape = factor(year)), color = "grey60", alpha = 0.7, size = 1)+
# averages and smooth
geom_point(aes(x = asta_ng, y = sample_redness, shape = factor(year)), color = "#01665e", size = 2)+
#geom_smooth(aes(x = asta_ng, y = sample_redness_log), color = "grey50", size = 0.5, se= F)+
geom_smooth(aes(x = asta_ng, y = sample_redness), method = "lm", color = "#01665e", formula = 'y ~ x', size = 0.5, se= F)+
# print selected image
geom_point(aes(x = asta_ng, y = redness_index, shape = factor(year)), color = "#d94801", size = 1.5, data = sample_to_show)+
geom_text_repel(aes(x = asta_ng, y = redness_index, label = image_letter), color = "#d94801", data = sample_to_show, size =4, max.overlaps = 15)+
labs(x= "Total astaxanthin content (TAC) (ng) (log axis)", y ="Redness indices (log axis)", shape = "Year")+
theme_bw(base_size=12) +
scale_y_continuous(trans = "log")+
scale_x_continuous(trans = "log")
a+bln(TAC) = f(ln(RI)) -> p-value < 0.001, Adjusted R-squared = 0.74, slope (b) = 0.91, intercept (ln(a)) = -2.64 so a = 0.096 ln(RI) = f(ln(TAC)) -> p-value < 0.001, Adjusted R-squared = 0.7405, slope (b) = 0.81, intercept (ln(a)) = 3.13, so a = 22.8
summary(lm(log(asta_ng) ~ log(sample_redness), data = red_index_by_sample2))##
## Call:
## lm(formula = log(asta_ng) ~ log(sample_redness), data = red_index_by_sample2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.50193 -0.40344 0.02598 0.36584 1.43693
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.34663 0.33426 -7.02 1.98e-09 ***
## log(sample_redness) 0.91361 0.06795 13.45 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5787 on 62 degrees of freedom
## Multiple R-squared: 0.7446, Adjusted R-squared: 0.7405
## F-statistic: 180.8 on 1 and 62 DF, p-value: < 2.2e-16
summary(lm(log(sample_redness) ~ log(asta_ng), data = red_index_by_sample2))##
## Call:
## lm(formula = log(sample_redness) ~ log(asta_ng), data = red_index_by_sample2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.62971 -0.38337 -0.00697 0.34627 1.13108
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.13910 0.14135 22.21 <2e-16 ***
## log(asta_ng) 0.81506 0.06062 13.45 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5466 on 62 degrees of freedom
## Multiple R-squared: 0.7446, Adjusted R-squared: 0.7405
## F-statistic: 180.8 on 1 and 62 DF, p-value: < 2.2e-16